{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 猎聘数据挖掘"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       "/* 本作业讲义使用之CSS */\n",
       "div.code_cell {\n",
       "    background-color: #e5f1fe;\n",
       "}\n",
       "div.cell.selected {\n",
       "    background-color: #effee2;\n",
       "    font-size: 2rem;\n",
       "    line-height: 2.4rem;\n",
       "}\n",
       "div.cell.selected .rendered_html table {\n",
       "    font-size: 2rem !important;\n",
       "    line-height: 2.4rem !important;\n",
       "}\n",
       ".rendered_html pre code {\n",
       "    background-color: #C4E4ff;   \n",
       "    padding: 2px 25px;\n",
       "}\n",
       ".rendered_html pre {\n",
       "    background-color: #99c9ff;\n",
       "}\n",
       "div.code_cell .CodeMirror {\n",
       "    font-size: 2rem !important;\n",
       "    line-height: 2.4rem !important;\n",
       "}\n",
       ".rendered_html img, .rendered_html svg {\n",
       "    max-width: 60%;\n",
       "    height: auto;\n",
       "    float: right;\n",
       "}\n",
       "\n",
       ".rendered_html img[src*=\"#full\"], .rendered_html svg[src*=\"#full\"] {\n",
       "    max-width: 100%;\n",
       "    height: auto;\n",
       "    float: none;\n",
       "}\n",
       "\n",
       ".rendered_html img[src*=\"#thumbnail\"], .rendered_html svg[src*=\"#thumbnail\"] {\n",
       "    max-width: 15%;\n",
       "    height: auto;\n",
       "}\n",
       "\n",
       "/* Gradient transparent - color - transparent */\n",
       "hr {\n",
       "    border: 0;\n",
       "    border-bottom: 1px dashed #ccc;\n",
       "}\n",
       ".emoticon{\n",
       "    font-size: 5rem;\n",
       "    line-height: 4.4rem;\n",
       "    text-align: center;\n",
       "    vertical-align: middle;\n",
       "}\n",
       ".bg-split_apply_comine {\n",
       "    width: 500px;     \n",
       "    height: 300px;\n",
       "    background: url('02_split-apply-comine_500x300.png') -10px -10px;\n",
       "    float: right;\n",
       "}\n",
       ".bg-comine {\n",
       "    width: 175px;\n",
       "    height: 150px;\n",
       "    background: url('02_split-apply-comine_500x300.png') -280px -80px;\n",
       "    float: right;\n",
       "}\n",
       ".bg-apply {\n",
       "    width: 155px;\n",
       "    height: 225px;\n",
       "    background: url('02_split-apply-comine_500x300.png') -160px -30px;\n",
       "    float: right;\n",
       "}\n",
       ".bg-split {\n",
       "    width: 205px;\n",
       "    height: 225px;\n",
       "    background: url('02_split-apply-comine_500x300.png') -10px -30px;\n",
       "    float: right;\n",
       "}\n",
       ".break {\n",
       "                   page-break-after: right; \n",
       "                   width:700px;\n",
       "                   clear:both;\n",
       "}\n",
       "</style>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<style>\n",
    "/* 本作业讲义使用之CSS */\n",
    "div.code_cell {\n",
    "    background-color: #e5f1fe;\n",
    "}\n",
    "div.cell.selected {\n",
    "    background-color: #effee2;\n",
    "    font-size: 2rem;\n",
    "    line-height: 2.4rem;\n",
    "}\n",
    "div.cell.selected .rendered_html table {\n",
    "    font-size: 2rem !important;\n",
    "    line-height: 2.4rem !important;\n",
    "}\n",
    ".rendered_html pre code {\n",
    "    background-color: #C4E4ff;   \n",
    "    padding: 2px 25px;\n",
    "}\n",
    ".rendered_html pre {\n",
    "    background-color: #99c9ff;\n",
    "}\n",
    "div.code_cell .CodeMirror {\n",
    "    font-size: 2rem !important;\n",
    "    line-height: 2.4rem !important;\n",
    "}\n",
    ".rendered_html img, .rendered_html svg {\n",
    "    max-width: 60%;\n",
    "    height: auto;\n",
    "    float: right;\n",
    "}\n",
    "\n",
    ".rendered_html img[src*=\"#full\"], .rendered_html svg[src*=\"#full\"] {\n",
    "    max-width: 100%;\n",
    "    height: auto;\n",
    "    float: none;\n",
    "}\n",
    "\n",
    ".rendered_html img[src*=\"#thumbnail\"], .rendered_html svg[src*=\"#thumbnail\"] {\n",
    "    max-width: 15%;\n",
    "    height: auto;\n",
    "}\n",
    "\n",
    "/* Gradient transparent - color - transparent */\n",
    "hr {\n",
    "    border: 0;\n",
    "    border-bottom: 1px dashed #ccc;\n",
    "}\n",
    ".emoticon{\n",
    "    font-size: 5rem;\n",
    "    line-height: 4.4rem;\n",
    "    text-align: center;\n",
    "    vertical-align: middle;\n",
    "}\n",
    ".bg-split_apply_comine {\n",
    "    width: 500px;     \n",
    "    height: 300px;\n",
    "    background: url('02_split-apply-comine_500x300.png') -10px -10px;\n",
    "    float: right;\n",
    "}\n",
    ".bg-comine {\n",
    "    width: 175px;\n",
    "    height: 150px;\n",
    "    background: url('02_split-apply-comine_500x300.png') -280px -80px;\n",
    "    float: right;\n",
    "}\n",
    ".bg-apply {\n",
    "    width: 155px;\n",
    "    height: 225px;\n",
    "    background: url('02_split-apply-comine_500x300.png') -160px -30px;\n",
    "    float: right;\n",
    "}\n",
    ".bg-split {\n",
    "    width: 205px;\n",
    "    height: 225px;\n",
    "    background: url('02_split-apply-comine_500x300.png') -10px -30px;\n",
    "    float: right;\n",
    "}\n",
    ".break {\n",
    "                   page-break-after: right; \n",
    "                   width:700px;\n",
    "                   clear:both;\n",
    "}\n",
    "</style>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from requests_html import HTMLSession\n",
    "from urllib.parse import urlparse, parse_qs\n",
    "import time\n",
    "from random import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/?keyword=PRD\"\n",
    "session = HTMLSession()\n",
    "r = session.get( url )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司地点': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_week03_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 公司"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 解析公司URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'中国500强': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=155&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '2018互联网300强': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=182&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '制造业500强': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=186&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " 'AI创新成长50强 ': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=189&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '独角兽': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=130&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '上市公司': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=156&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232'}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "公司数据选择器链结 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[0] \\\n",
    "                    .xpath('//div[contains(@class,\"hot-comp-tags\")]/a')\n",
    "\n",
    "公司数据选择器链结 = { x.xpath(\"a/text()\")[0]:x.xpath(\"a/@href\")[0] for x in 公司数据选择器链结}\n",
    "公司数据选择器链结"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=155&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=182&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=186&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=189&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=130&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=156&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232', fragment='')]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[ urlparse(x) for x in 公司数据选择器链结.values()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 6 entries, 0 to 5\n",
      "Data columns (total 6 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   scheme    6 non-null      object\n",
      " 1   netloc    6 non-null      object\n",
      " 2   path      6 non-null      object\n",
      " 3   params    6 non-null      object\n",
      " 4   query     6 non-null      object\n",
      " 5   fragment  6 non-null      object\n",
      "dtypes: object(6)\n",
      "memory usage: 416.0+ bytes\n",
      "scheme      1\n",
      "netloc      1\n",
      "path        1\n",
      "params      1\n",
      "query       6\n",
      "fragment    1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "1                /zhaopin/          \n",
       "2                /zhaopin/          \n",
       "3                /zhaopin/          \n",
       "4                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  init=-1&headckid=d404d4cc0884e887&flushckid=1&...           \n",
       "1  init=-1&headckid=d404d4cc0884e887&flushckid=1&...           \n",
       "2  init=-1&headckid=d404d4cc0884e887&flushckid=1&...           \n",
       "3  init=-1&headckid=d404d4cc0884e887&flushckid=1&...           \n",
       "4  init=-1&headckid=d404d4cc0884e887&flushckid=1&...           "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([ urlparse(x) for x in 公司数据选择器链结.values()])\n",
    "df.info()\n",
    "print(df.nunique())\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init             1\n",
      "headckid         1\n",
      "flushckid        1\n",
      "fromSearchBtn    1\n",
      "keyword          1\n",
      "compTag          6\n",
      "ckid             1\n",
      "siTag            1\n",
      "d_sfrom          1\n",
      "d_ckId           1\n",
      "d_curPage        1\n",
      "d_pageSize       1\n",
      "d_headId         1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>keyword</th>\n",
       "      <th>compTag</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>PRD</td>\n",
       "      <td>155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>PRD</td>\n",
       "      <td>182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>PRD</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>PRD</td>\n",
       "      <td>189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>PRD</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>PRD</td>\n",
       "      <td>156</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  keyword compTag\n",
       "0     PRD     155\n",
       "1     PRD     182\n",
       "2     PRD     186\n",
       "3     PRD     189\n",
       "4     PRD     130\n",
       "5     PRD     156"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "print (df_qs.nunique())\n",
    "df_qs.head()\n",
    "df_qs[['keyword','compTag']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 创建公司模板字典"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['PRD'], 'compTag': ['155'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}\n",
      "{'中国500强': '155', '2018互联网300强': '182', '制造业500强': '186', 'AI创新成长50强 ': '189', '独角兽': '130', '上市公司': '156'}\n"
     ]
    }
   ],
   "source": [
    "def parse_url_qs_for_fenlei(url):\n",
    "    six_parts = urlparse(url) \n",
    "    out = parse_qs(six_parts.query)\n",
    "    return (out)\n",
    "\n",
    "\n",
    "参数模板 = parse_url_qs_for_fenlei(list(公司数据选择器链结.values())[0])\n",
    "print(参数模板)\n",
    "\n",
    "字典_compTag = { k:parse_url_qs_for_fenlei(v)['compTag'][0] for k,v in 公司数据选择器链结.items()}\n",
    "print (字典_compTag)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'中国500强': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['互联网设计'], 'compTag': ['155'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '2018互联网300强': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['互联网设计'], 'compTag': ['182'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '制造业500强': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['互联网设计'], 'compTag': ['186'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, 'AI创新成长50强 ': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['互联网设计'], 'compTag': ['189'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '独角兽': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['互联网设计'], 'compTag': ['130'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '上市公司': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['互联网设计'], 'compTag': ['156'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}}\n"
     ]
    }
   ],
   "source": [
    "def 参数模板生成(compTag , keyword ):\n",
    "    参数 = 参数模板.copy()\n",
    "    参数['compTag'] = compTag\n",
    "    参数['keyword'] = keyword\n",
    "    return (参数)\n",
    "\n",
    "参数_compTag_互联网设计 = { k:参数模板生成(compTag = [v], keyword = ['互联网设计']) for k,v in 字典_compTag.items()}\n",
    "print(参数_compTag_互联网设计)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_df = list()\n",
    "for k,v in 参数_compTag_互联网设计.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (热门公司类型 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all_compTag = pd.concat(list_df)\n",
    "df_all_compTag\n",
    "\n",
    "df_all_compTag.to_excel(\"Web数据挖掘_猎聘_互联网设计_公司分类.xlsx\", sheet_name=\"公司分类\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 行业\n",
    "## 解析行业URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'互联网/电商': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=040&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '游戏产业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=420&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '计算机软件': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=010&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " 'IT服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=030&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '电子/芯片/半导体': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=050&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '通信业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=060&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '计算机/网络设备': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=020&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '房地产/建筑': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=080&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '规划/设计/装潢': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=100&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '房地产服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=090&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '银行': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=130&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '保险': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=140&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '基金/证券/投资': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=150&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '会计/审计': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=430&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '信托/担保/拍卖': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=500&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '快消品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=190&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '批发零售': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=240&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '服装纺织': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=200&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '家具/家电': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=210&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '办公设备': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=220&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '奢侈品/收藏品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=460&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '珠宝/玩具/工艺品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=470&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '汽车/摩托车': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=350&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '机械/机电/重工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=360&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '印刷/包装/造纸': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=180&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '原材料加工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=370&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '仪器/电气/自动化': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=340&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '制药/生物工程': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=270&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '医疗/保健/美容': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=280&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '医疗器械': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=290&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '能源/水利': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=330&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '石油/化工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=310&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '采掘/冶炼/矿产': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=320&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '环保': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=300&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '新能源': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=490&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '专业服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=120&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '中介服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=110&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '外包服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=440&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '检测/认证': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=450&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '餐饮/酒旅/服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=230&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '文体娱乐': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=260&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '租赁服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=510&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '广告/市场/会展': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=070&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '影视文化': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=170&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '教育培训': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=380&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '交通/物流/运输': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=250&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '贸易/进出口': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=160&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '航空/航天': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=480&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '政务/公共服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=390&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '农林牧渔': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=410&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '其他行业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&industries=400&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232'}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "行业数据选择器链结 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[1] \\\n",
    "                    .xpath('//div[contains(@class,\"sub-industry\")]/a')\n",
    "\n",
    "行业数据选择器链结 = { x.xpath(\"a/text()\")[0]:x.xpath(\"a/@href\")[0] for x in 行业数据选择器链结}\n",
    "行业数据选择器链结"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 51 entries, 0 to 50\n",
      "Data columns (total 6 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   scheme    51 non-null     object\n",
      " 1   netloc    51 non-null     object\n",
      " 2   path      51 non-null     object\n",
      " 3   params    51 non-null     object\n",
      " 4   query     51 non-null     object\n",
      " 5   fragment  51 non-null     object\n",
      "dtypes: object(6)\n",
      "memory usage: 2.5+ KB\n",
      "scheme       1\n",
      "netloc       1\n",
      "path         1\n",
      "params       1\n",
      "query       51\n",
      "fragment     1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>subIndustry=&amp;init=-1&amp;industryType=industry_01&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>subIndustry=&amp;init=-1&amp;industryType=industry_01&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>subIndustry=&amp;init=-1&amp;industryType=industry_01&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>subIndustry=&amp;init=-1&amp;industryType=industry_01&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>subIndustry=&amp;init=-1&amp;industryType=industry_02&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "1                /zhaopin/          \n",
       "2                /zhaopin/          \n",
       "3                /zhaopin/          \n",
       "4                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  subIndustry=&init=-1&industryType=industry_01&...           \n",
       "1  subIndustry=&init=-1&industryType=industry_01&...           \n",
       "2  subIndustry=&init=-1&industryType=industry_01&...           \n",
       "3  subIndustry=&init=-1&industryType=industry_01&...           \n",
       "4  subIndustry=&init=-1&industryType=industry_02&...           "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([ urlparse(x) for x in 行业数据选择器链结.values()])\n",
    "df.info()\n",
    "print(df.nunique())\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init              1\n",
      "industryType     12\n",
      "headckid          1\n",
      "flushckid         1\n",
      "fromSearchBtn     1\n",
      "industries       51\n",
      "keyword           1\n",
      "ckid              1\n",
      "siTag             1\n",
      "d_sfrom           1\n",
      "d_ckId            1\n",
      "d_curPage         1\n",
      "d_pageSize        1\n",
      "d_headId          1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>keyword</th>\n",
       "      <th>industries</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>PRD</td>\n",
       "      <td>040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>PRD</td>\n",
       "      <td>420</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>PRD</td>\n",
       "      <td>010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>PRD</td>\n",
       "      <td>030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>PRD</td>\n",
       "      <td>050</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>PRD</td>\n",
       "      <td>060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>PRD</td>\n",
       "      <td>020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>PRD</td>\n",
       "      <td>080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>PRD</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>PRD</td>\n",
       "      <td>090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>PRD</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>PRD</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>PRD</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>PRD</td>\n",
       "      <td>430</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>PRD</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>PRD</td>\n",
       "      <td>190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>PRD</td>\n",
       "      <td>240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>PRD</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>PRD</td>\n",
       "      <td>210</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>PRD</td>\n",
       "      <td>220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>PRD</td>\n",
       "      <td>460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>PRD</td>\n",
       "      <td>470</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>PRD</td>\n",
       "      <td>350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>PRD</td>\n",
       "      <td>360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>PRD</td>\n",
       "      <td>180</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>PRD</td>\n",
       "      <td>370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>PRD</td>\n",
       "      <td>340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>PRD</td>\n",
       "      <td>270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>PRD</td>\n",
       "      <td>280</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>PRD</td>\n",
       "      <td>290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>PRD</td>\n",
       "      <td>330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>PRD</td>\n",
       "      <td>310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>PRD</td>\n",
       "      <td>320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>PRD</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>PRD</td>\n",
       "      <td>490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>PRD</td>\n",
       "      <td>120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>PRD</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>PRD</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>PRD</td>\n",
       "      <td>450</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>PRD</td>\n",
       "      <td>230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>PRD</td>\n",
       "      <td>260</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>PRD</td>\n",
       "      <td>510</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>PRD</td>\n",
       "      <td>070</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>PRD</td>\n",
       "      <td>170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>PRD</td>\n",
       "      <td>380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>PRD</td>\n",
       "      <td>250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>PRD</td>\n",
       "      <td>160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>PRD</td>\n",
       "      <td>480</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>PRD</td>\n",
       "      <td>390</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>PRD</td>\n",
       "      <td>410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>PRD</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   keyword industries\n",
       "0      PRD        040\n",
       "1      PRD        420\n",
       "2      PRD        010\n",
       "3      PRD        030\n",
       "4      PRD        050\n",
       "5      PRD        060\n",
       "6      PRD        020\n",
       "7      PRD        080\n",
       "8      PRD        100\n",
       "9      PRD        090\n",
       "10     PRD        130\n",
       "11     PRD        140\n",
       "12     PRD        150\n",
       "13     PRD        430\n",
       "14     PRD        500\n",
       "15     PRD        190\n",
       "16     PRD        240\n",
       "17     PRD        200\n",
       "18     PRD        210\n",
       "19     PRD        220\n",
       "20     PRD        460\n",
       "21     PRD        470\n",
       "22     PRD        350\n",
       "23     PRD        360\n",
       "24     PRD        180\n",
       "25     PRD        370\n",
       "26     PRD        340\n",
       "27     PRD        270\n",
       "28     PRD        280\n",
       "29     PRD        290\n",
       "30     PRD        330\n",
       "31     PRD        310\n",
       "32     PRD        320\n",
       "33     PRD        300\n",
       "34     PRD        490\n",
       "35     PRD        120\n",
       "36     PRD        110\n",
       "37     PRD        440\n",
       "38     PRD        450\n",
       "39     PRD        230\n",
       "40     PRD        260\n",
       "41     PRD        510\n",
       "42     PRD        070\n",
       "43     PRD        170\n",
       "44     PRD        380\n",
       "45     PRD        250\n",
       "46     PRD        160\n",
       "47     PRD        480\n",
       "48     PRD        390\n",
       "49     PRD        410\n",
       "50     PRD        400"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "print (df_qs.nunique())\n",
    "df_qs.head()\n",
    "df_qs[['keyword','industries']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 创建行业模板字典"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['040'], 'keyword': ['PRD'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}\n",
      "{'互联网/电商': '040', '游戏产业': '420', '计算机软件': '010', 'IT服务': '030', '电子/芯片/半导体': '050', '通信业': '060', '计算机/网络设备': '020', '房地产/建筑': '080', '规划/设计/装潢': '100', '房地产服务': '090', '银行': '130', '保险': '140', '基金/证券/投资': '150', '会计/审计': '430', '信托/担保/拍卖': '500', '快消品': '190', '批发零售': '240', '服装纺织': '200', '家具/家电': '210', '办公设备': '220', '奢侈品/收藏品': '460', '珠宝/玩具/工艺品': '470', '汽车/摩托车': '350', '机械/机电/重工': '360', '印刷/包装/造纸': '180', '原材料加工': '370', '仪器/电气/自动化': '340', '制药/生物工程': '270', '医疗/保健/美容': '280', '医疗器械': '290', '能源/水利': '330', '石油/化工': '310', '采掘/冶炼/矿产': '320', '环保': '300', '新能源': '490', '专业服务': '120', '中介服务': '110', '外包服务': '440', '检测/认证': '450', '餐饮/酒旅/服务': '230', '文体娱乐': '260', '租赁服务': '510', '广告/市场/会展': '070', '影视文化': '170', '教育培训': '380', '交通/物流/运输': '250', '贸易/进出口': '160', '航空/航天': '480', '政务/公共服务': '390', '农林牧渔': '410', '其他行业': '400'}\n"
     ]
    }
   ],
   "source": [
    "参数模板 = parse_url_qs_for_fenlei(list(行业数据选择器链结.values())[0])\n",
    "print(参数模板)\n",
    "\n",
    "字典_industries= { k:parse_url_qs_for_fenlei(v)['industries'][0] for k,v in 行业数据选择器链结.items()}\n",
    "print (字典_industries)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'互联网/电商': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['040'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '游戏产业': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['420'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '计算机软件': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['010'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, 'IT服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['030'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '电子/芯片/半导体': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['050'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '通信业': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['060'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '计算机/网络设备': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['020'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '房地产/建筑': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['080'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '规划/设计/装潢': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['100'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '房地产服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['090'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '银行': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['130'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '保险': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['140'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '基金/证券/投资': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['150'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '会计/审计': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['430'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '信托/担保/拍卖': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['500'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '快消品': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['190'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '批发零售': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['240'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '服装纺织': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['200'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '家具/家电': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['210'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '办公设备': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['220'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '奢侈品/收藏品': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['460'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '珠宝/玩具/工艺品': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['470'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '汽车/摩托车': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['350'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '机械/机电/重工': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['360'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '印刷/包装/造纸': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['180'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '原材料加工': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['370'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '仪器/电气/自动化': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['340'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '制药/生物工程': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['270'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '医疗/保健/美容': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['280'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '医疗器械': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['290'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '能源/水利': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['330'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '石油/化工': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['310'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '采掘/冶炼/矿产': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['320'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '环保': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['300'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '新能源': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['490'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '专业服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['120'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '中介服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['110'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '外包服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['440'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '检测/认证': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['450'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '餐饮/酒旅/服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['230'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '文体娱乐': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['260'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '租赁服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['510'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '广告/市场/会展': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['070'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '影视文化': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['170'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '教育培训': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['380'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '交通/物流/运输': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['250'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '贸易/进出口': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['160'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '航空/航天': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['480'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '政务/公共服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['390'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '农林牧渔': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['410'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '其他行业': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['400'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}}\n"
     ]
    }
   ],
   "source": [
    "def 参数模板生成(industries , keyword ):\n",
    "    参数 = 参数模板.copy()\n",
    "    参数['industries'] = industries\n",
    "    参数['keyword'] = keyword\n",
    "    return (参数)\n",
    "\n",
    "参数_industries_互联网设计 = { k:参数模板生成(industries = [v], keyword = ['互联网设计']) for k,v in 字典_industries.items()}\n",
    "print(参数_industries_互联网设计)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_df = list()\n",
    "for k,v in 参数_industries_互联网设计.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (行业 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all_industries = pd.concat(list_df)\n",
    "df_all_industries.index.name=\"序\"\n",
    "df_all_industries.to_excel(\"Web数据挖掘_猎聘_互联网设计_行业.xlsx\", sheet_name=\"行业分类\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 地区\n",
    "## 解析地区URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'北京': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=010&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '上海': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=020&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '广州': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=050020&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '深圳': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=050090&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '天津': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=030&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '苏州': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=060080&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '重庆': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=040&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '南京': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=060020&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '杭州': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=070020&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '大连': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=210040&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '成都': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=280020&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232',\n",
       " '武汉': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&flushckid=1&fromSearchBtn=2&dqs=170020&keyword=PRD&ckid=d404d4cc0884e887&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232'}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "地区数据选择器链结 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[2] \\\n",
    "                    .xpath('//a[starts-with(@href,\"/zhaopin\")]')\n",
    "\n",
    "地区数据选择器链结 = { x.xpath(\"a/text()\")[0]:x.xpath(\"a/@href\")[0] for x in 地区数据选择器链结}\n",
    "del 地区数据选择器链结['全国']\n",
    "地区数据选择器链结"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 12 entries, 0 to 11\n",
      "Data columns (total 6 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   scheme    12 non-null     object\n",
      " 1   netloc    12 non-null     object\n",
      " 2   path      12 non-null     object\n",
      " 3   params    12 non-null     object\n",
      " 4   query     12 non-null     object\n",
      " 5   fragment  12 non-null     object\n",
      "dtypes: object(6)\n",
      "memory usage: 704.0+ bytes\n",
      "scheme       1\n",
      "netloc       1\n",
      "path         1\n",
      "params       1\n",
      "query       12\n",
      "fragment     1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "1                /zhaopin/          \n",
       "2                /zhaopin/          \n",
       "3                /zhaopin/          \n",
       "4                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  init=-1&headckid=d404d4cc0884e887&flushckid=1&...           \n",
       "1  init=-1&headckid=d404d4cc0884e887&flushckid=1&...           \n",
       "2  init=-1&headckid=d404d4cc0884e887&flushckid=1&...           \n",
       "3  init=-1&headckid=d404d4cc0884e887&flushckid=1&...           \n",
       "4  init=-1&headckid=d404d4cc0884e887&flushckid=1&...           "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([ urlparse(x) for x in 地区数据选择器链结.values()])\n",
    "df.info()\n",
    "print(df.nunique())\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init              1\n",
      "headckid          1\n",
      "flushckid         1\n",
      "fromSearchBtn     1\n",
      "dqs              12\n",
      "keyword           1\n",
      "ckid              1\n",
      "siTag             1\n",
      "d_sfrom           1\n",
      "d_ckId            1\n",
      "d_curPage         1\n",
      "d_pageSize        1\n",
      "d_headId          1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>keyword</th>\n",
       "      <th>dqs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>PRD</td>\n",
       "      <td>010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>PRD</td>\n",
       "      <td>020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>PRD</td>\n",
       "      <td>050020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>PRD</td>\n",
       "      <td>050090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>PRD</td>\n",
       "      <td>030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>PRD</td>\n",
       "      <td>060080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>PRD</td>\n",
       "      <td>040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>PRD</td>\n",
       "      <td>060020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>PRD</td>\n",
       "      <td>070020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>PRD</td>\n",
       "      <td>210040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>PRD</td>\n",
       "      <td>280020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>PRD</td>\n",
       "      <td>170020</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   keyword     dqs\n",
       "0      PRD     010\n",
       "1      PRD     020\n",
       "2      PRD  050020\n",
       "3      PRD  050090\n",
       "4      PRD     030\n",
       "5      PRD  060080\n",
       "6      PRD     040\n",
       "7      PRD  060020\n",
       "8      PRD  070020\n",
       "9      PRD  210040\n",
       "10     PRD  280020\n",
       "11     PRD  170020"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "print (df_qs.nunique())\n",
    "df_qs.head()\n",
    "df_qs[['keyword','dqs']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 创建地区模板字典"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['020'], 'keyword': ['PRD'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}\n",
      "{'北京': '010', '上海': '020', '广州': '050020', '深圳': '050090', '天津': '030', '苏州': '060080', '重庆': '040', '南京': '060020', '杭州': '070020', '大连': '210040', '成都': '280020', '武汉': '170020'}\n"
     ]
    }
   ],
   "source": [
    "参数模板 = parse_url_qs_for_fenlei(list( 地区数据选择器链结.values())[1])\n",
    "print(参数模板)\n",
    "\n",
    "\n",
    "字典_dqs= { k:parse_url_qs_for_fenlei(v)['dqs'][0] for k,v in 地区数据选择器链结.items()}\n",
    "print (字典_dqs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'北京': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['010'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '上海': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['020'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '广州': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['050020'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '深圳': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['050090'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '天津': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['030'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '苏州': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['060080'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '重庆': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['040'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '南京': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['060020'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '杭州': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['070020'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '大连': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['210040'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '成都': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['280020'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}, '武汉': {'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'dqs': ['170020'], 'keyword': ['互联网设计'], 'ckid': ['d404d4cc0884e887'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232']}}\n"
     ]
    }
   ],
   "source": [
    "def 参数模板生成(dqs, keyword ):\n",
    "    参数 = 参数模板.copy()\n",
    "    参数['dqs'] = dqs\n",
    "    参数['keyword'] = keyword\n",
    "    return (参数)\n",
    "\n",
    "参数_dqs_互联网设计 = { k:参数模板生成(dqs = [v], keyword = ['互联网设计']) for k,v in 字典_dqs.items()}\n",
    "print(参数_dqs_互联网设计)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_df = list()\n",
    "for k,v in 参数_dqs_互联网设计.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (dqs = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all_dqs = pd.concat(list_df)\n",
    "df_all_dqs\n",
    "df_all_dqs.to_excel(\"Web数据挖掘_猎聘_互联网设计_地区分类.xlsx\", sheet_name=\"地区分类\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 翻页模板"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'2': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&fromSearchBtn=2&keyword=PRD&ckid=d404d4cc0884e887°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232&curPage=1', '3': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&fromSearchBtn=2&keyword=PRD&ckid=d404d4cc0884e887°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232&curPage=2', '4': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&fromSearchBtn=2&keyword=PRD&ckid=d404d4cc0884e887°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232&curPage=3', '5': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&fromSearchBtn=2&keyword=PRD&ckid=d404d4cc0884e887°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232&curPage=4', '下一页': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&fromSearchBtn=2&keyword=PRD&ckid=d404d4cc0884e887°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232&curPage=1', '': '/zhaopin/?init=-1&headckid=d404d4cc0884e887&fromSearchBtn=2&keyword=PRD&ckid=d404d4cc0884e887°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=44f3110ca52e44604c6e815ef1dd3232&d_curPage=0&d_pageSize=40&d_headId=44f3110ca52e44604c6e815ef1dd3232&curPage=9'}\n"
     ]
    }
   ],
   "source": [
    "xpath_翻页a = '//div[@class=\"pagerbar\"]/a[starts-with(@href,\"/zhaopin\")]'\n",
    "href_列表 = [x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)]\n",
    "文字_列表 = [x.text for x in r.html.xpath(xpath_翻页a)]\n",
    "\n",
    "href_字典 = {x.text:x.xpath('//@href')[0]  for x in r.html.xpath(xpath_翻页a)}\n",
    "print (href_字典)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=d404d4cc0884e887&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "1                /zhaopin/          \n",
       "2                /zhaopin/          \n",
       "3                /zhaopin/          \n",
       "4                /zhaopin/          \n",
       "5                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  init=-1&headckid=d404d4cc0884e887&fromSearchBt...           \n",
       "1  init=-1&headckid=d404d4cc0884e887&fromSearchBt...           \n",
       "2  init=-1&headckid=d404d4cc0884e887&fromSearchBt...           \n",
       "3  init=-1&headckid=d404d4cc0884e887&fromSearchBt...           \n",
       "4  init=-1&headckid=d404d4cc0884e887&fromSearchBt...           \n",
       "5  init=-1&headckid=d404d4cc0884e887&fromSearchBt...           "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "scheme      1\n",
      "netloc      1\n",
      "path        1\n",
      "params      1\n",
      "query       5\n",
      "fragment    1\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "df = pd.DataFrame([ urlparse(x) for x in href_列表])\n",
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "\n",
    "display(df)\n",
    "print(df.nunique())\n",
    "df_qs.curPage\n",
    "df_qs = df_qs.assign (curPage_int=df_qs.curPage.astype(int)) # 变成整数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'init': ['-1'], 'headckid': ['d404d4cc0884e887'], 'fromSearchBtn': ['2'], 'keyword': ['PRD'], 'ckid': ['d404d4cc0884e887°radeFlag=0'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'], 'curPage': ['1']}\n"
     ]
    }
   ],
   "source": [
    "def parse_url_qs_for_curPage (url):\n",
    "    six_parts = urlparse(url) \n",
    "    out = parse_qs(six_parts.query)\n",
    "    return (out)\n",
    "\n",
    "# 取一例做模板\n",
    "参数模板 = parse_url_qs_for_curPage(href_列表[0])\n",
    "print (参数模板)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: {'init': ['-1'],\n",
       "  'headckid': ['d404d4cc0884e887'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': '互联网设计',\n",
       "  'ckid': ['d404d4cc0884e887°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'curPage': [0],\n",
       "  'compTag': None,\n",
       "  'dqs': None,\n",
       "  'industries': None},\n",
       " 1: {'init': ['-1'],\n",
       "  'headckid': ['d404d4cc0884e887'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': '互联网设计',\n",
       "  'ckid': ['d404d4cc0884e887°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'curPage': [1],\n",
       "  'compTag': None,\n",
       "  'dqs': None,\n",
       "  'industries': None},\n",
       " 2: {'init': ['-1'],\n",
       "  'headckid': ['d404d4cc0884e887'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': '互联网设计',\n",
       "  'ckid': ['d404d4cc0884e887°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'curPage': [2],\n",
       "  'compTag': None,\n",
       "  'dqs': None,\n",
       "  'industries': None},\n",
       " 3: {'init': ['-1'],\n",
       "  'headckid': ['d404d4cc0884e887'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': '互联网设计',\n",
       "  'ckid': ['d404d4cc0884e887°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'curPage': [3],\n",
       "  'compTag': None,\n",
       "  'dqs': None,\n",
       "  'industries': None},\n",
       " 4: {'init': ['-1'],\n",
       "  'headckid': ['d404d4cc0884e887'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': '互联网设计',\n",
       "  'ckid': ['d404d4cc0884e887°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'curPage': [4],\n",
       "  'compTag': None,\n",
       "  'dqs': None,\n",
       "  'industries': None},\n",
       " 5: {'init': ['-1'],\n",
       "  'headckid': ['d404d4cc0884e887'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': '互联网设计',\n",
       "  'ckid': ['d404d4cc0884e887°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'curPage': [5],\n",
       "  'compTag': None,\n",
       "  'dqs': None,\n",
       "  'industries': None},\n",
       " 6: {'init': ['-1'],\n",
       "  'headckid': ['d404d4cc0884e887'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': '互联网设计',\n",
       "  'ckid': ['d404d4cc0884e887°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'curPage': [6],\n",
       "  'compTag': None,\n",
       "  'dqs': None,\n",
       "  'industries': None},\n",
       " 7: {'init': ['-1'],\n",
       "  'headckid': ['d404d4cc0884e887'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': '互联网设计',\n",
       "  'ckid': ['d404d4cc0884e887°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'curPage': [7],\n",
       "  'compTag': None,\n",
       "  'dqs': None,\n",
       "  'industries': None},\n",
       " 8: {'init': ['-1'],\n",
       "  'headckid': ['d404d4cc0884e887'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': '互联网设计',\n",
       "  'ckid': ['d404d4cc0884e887°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'curPage': [8],\n",
       "  'compTag': None,\n",
       "  'dqs': None,\n",
       "  'industries': None},\n",
       " 9: {'init': ['-1'],\n",
       "  'headckid': ['d404d4cc0884e887'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': '互联网设计',\n",
       "  'ckid': ['d404d4cc0884e887°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['44f3110ca52e44604c6e815ef1dd3232'],\n",
       "  'curPage': [9],\n",
       "  'compTag': None,\n",
       "  'dqs': None,\n",
       "  'industries': None}}"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def 参数模板生成(keyword,curPage,compTag=None,dqs=None,industries=None):\n",
    "    参数 = 参数模板.copy()\n",
    "    参数['curPage'] = curPage\n",
    "    参数['keyword'] = keyword\n",
    "    参数['compTag'] = compTag\n",
    "    参数['dqs'] = dqs\n",
    "    参数['industries'] = industries\n",
    "    return (参数)\n",
    "\n",
    "\n",
    "参数_curPage = { \n",
    "    i:参数模板生成(curPage = [i],keyword=\"互联网设计\")\\\n",
    "    for i in range(0,df_qs.curPage_int.max()+1)\\\n",
    "    }\n",
    "参数_curPage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "list_df = list()\n",
    "for k,v in 参数_curPage.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (页数 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all_翻页 = pd.concat(list_df)\n",
    "df_all_翻页\n",
    "\n",
    "df_all_翻页.to_excel(\"Web数据挖掘_猎聘_互联网设计_翻页.xlsx\", sheet_name=\"关键词翻页\")\n",
    "\n",
    "# print (df_all.nunique())\n",
    "# df_all[['edu']].drop_duplicates()\n",
    "\n",
    "# df_all.groupby(['公司名称','edu']).agg({\"职称\":\"count\"}).sort_values(by='职称', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'155': '中国500强', '182': '2018互联网300强', '186': '制造业500强', '189': 'AI创新成长50强 ', '130': '独角兽', '156': '上市公司'}\n"
     ]
    }
   ],
   "source": [
    "# compTag_all = []\n",
    "# for k,v in 字典_compTag.items():\n",
    "#      compTag_all.append(v)\n",
    "# compTag_all\n",
    "# 公司字典 = dict(zip(字典_compTag.values(), 字典_compTag.keys()))\n",
    "# # print(公司字典)\n",
    "\n",
    "# dqs_all =[]\n",
    "# for k,v in 字典_dqs.items():\n",
    "#     dqs_all.append(v)\n",
    "# # dqs_all\n",
    "# 地区字典 = dict(zip(字典_dqs.values(), 字典_dqs.keys()))\n",
    "# # print(地区字典)\n",
    "\n",
    "# industries_all =[]\n",
    "# for k,v in 字典_industries.items():\n",
    "#     industries_all.append(v)\n",
    "# # industries_all\n",
    "# 行业字典 = dict(zip(字典_industries.values(), 字典_industries.keys()))\n",
    "# # print(行业字典)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10 155\n",
      "10 182\n",
      "10 186\n",
      "10 189\n",
      "10 130\n",
      "10 156\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-70-67a1ed61fdce>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     25\u001b[0m         \u001b[0mpayload\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mv\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     26\u001b[0m         \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrequests_liepin\u001b[0m\u001b[1;33m(\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpayload\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 27\u001b[1;33m         \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m4\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mrandom\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m  \u001b[1;31m#放慢脚步 3-7秒, 平均约5秒\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     28\u001b[0m         \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0massign\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0m热门公司类型\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     29\u001b[0m \u001b[1;31m#         df = df.assign (地区 = key)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "# list_df = list()\n",
    "# for item in compTag_all:\n",
    "# #     for key in dqs_all:\n",
    "# #         for a in industries_all:\n",
    "#             payload = 参数模板生成(compTag=[item], curPage=['0'],keyword=\"互联网设计\")\n",
    "#             df = requests_liepin( url, params = payload)\n",
    "#             href_列表 = [x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)]\n",
    "#             df = pd.DataFrame([ urlparse(x) for x in href_列表])\n",
    "#             df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "#             df_qs = df_qs.assign (curPage_int=df_qs.curPage.astype(int)) # 变成整数\n",
    "#             长度 = df_qs.curPage_int.max()+1\n",
    "#             参数_keyword_X = { \n",
    "#                 i:参数模板生成(curPage = [i], \\\n",
    "#                               compTag = [item],\\\n",
    "#                               keyword=\"互联网设计\",\\\n",
    "# #                               dqs=[key],\\\n",
    "# #                             industries=[a]\\\n",
    "#                                             ) \\\n",
    "#                 for i in range(0,长度)\\\n",
    "#                 }\n",
    "# #             print (参数_keyword_X)\n",
    "#             print (长度,item)\n",
    "    \n",
    "# for k,v in 参数_keyword_X.items():\n",
    "#         payload = v\n",
    "#         df = requests_liepin( url, params = payload)\n",
    "#         time.sleep(3+4*random())  #放慢脚步 3-7秒, 平均约5秒\n",
    "#         df = df.assign (热门公司类型 = item)  \n",
    "# #         df = df.assign (地区 = key)\n",
    "# #         df = df.assign (行业 = a)\n",
    "#         df = df.assign (curPage = k)  # 区分  curPage    \n",
    "#         list_df.append(df)\n",
    "        \n",
    "# df_all = pd.concat(list_df).reset_index()\n",
    "# df_all = df_all.set_index(\"热门公司类型\").rename(index=公司字典).reset_index()\n",
    "# # df_all = df_all.set_index(\"地区\").rename(index=地区字典).reset_index()\n",
    "# # df_all = df_all.set_index(\"行业\").rename(index=行业字典).reset_index()\n",
    "# df_all.index.name = '序'\n",
    "# df_all.to_excel(\"Web_Mining_week06_公司翻页.xlsx\",\\\n",
    "#                 sheet_name=\"互联网设计\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
